Define the model:

model_str <- "
  model {
    # Define the priors
    #lambda ~ dgamma(1, 1)                  # average number of new arrivals
    #rho ~ dbeta(1, 1)                      # detection probability
    #delta ~ dbeta(1, 1)                    # survival probability
  
    # Initialize the model for k = 1
    m[1] ~ dpois(lambda)                    # number of new arrivals
    n[1] <- m[1]                            # abundance
    y[1] ~ dbin(rho, n[1])                  # number of observed individuals
  
    # Define the model for k > 1
    for (k in 2:K) {
        z[k - 1] ~ dbin(delta, n[k - 1])
        m[k] ~ dpois(lambda)
        n[k] <- m[k] + z[k - 1]
        y[k] ~ dbin(rho, n[k])
    }
  }
"
model_conn <- textConnection(model_str)

Run JAGS and obtain samples:

library(rjags)

# Libraries for plotting
library(dplyr)
library(tidyr)
library(lazyeval)
library(ggplot2)

# Define data
K <- 10
n_iter <- 10000
lambda <- 10                 # rate of new arrivals
delta <- 0.5                 # survival probability
rho <- 0.8                   # detection probability
y <- c(10, 11, 7, 15, 13, 17, 13, 13, 18, 15)

# Initialize values
m0 <- y

# Run JAGS
model <- jags.model(model_conn,
                    data = list('y' = y, 'K' = K, 'lambda' = lambda,
                                'delta' = delta, 'rho' = rho),
                    inits = list('m' = m0), # 'z' = z0
                    n.chains = 1,
                    n.adapt = 0)
t <- system.time(
  samples <- coda.samples(model, c('n', 'm', 'z'), n.iter = n_iter, thin = 20)
)

Plot the cumulative mean vs iteration for each variable n, m and z at each time step k:

# Turn mcmc object into a dataframe
t_vec <- cumsum(rep(t[3] / nrow(samples[[1]]), nrow(samples[[1]])))
samples_df <- data.frame(samples[[1]], iter = 1:nrow(samples[[1]]), time = t_vec)

# Remove periods from column names
colnames(samples_df) <- gsub('\\.', '', colnames(samples_df))

# Preprocess the data frame for ggplot
samples_df <- samples_df %>%
  gather(var_k, value, -iter, -time) %>%
  separate(var_k, into = c('var', 'k'), sep = 1) %>%
  group_by(var, k) %>%
  mutate(cum_mean = cumsum(value) / iter)
samples_df$k <- factor(samples_df$k, levels = 1:K)

# Plot each variable
for (var in c('n', 'm', 'z')) {
  samples_var <- samples_df %>%
    filter_(interp(~ var == v, v = var))
  plt <- ggplot(samples_var, aes(iter, cum_mean, colour = k)) +
    geom_line() +
    theme_bw() +
    ggtitle(paste('Mean vs iteration of', var))
  print(plt)
}

Discard samples from iteration \(\leq\) 100:

samples_df_new <- samples_df %>% filter(iter > 100) %>%
  mutate(cum_mean = cumsum(value) / (iter - 100))

Compare the new sample means of n with its true means:

true_means <- c(11.8206, 13.6123, 11.0323, 17.8103, 17.1073, 20.2344, 17.0239, 17.1461, 21.3691, 19.2281)
samples_n <- samples_df_new %>%
  filter(var == 'n')

# Absolute difference at each time step
summary <- samples_n %>% filter(iter == max(iter)) %>%
  select(var, k, sample_mean = cum_mean)
summary$true_mean <- true_means
summary$diff <- abs(summary$sample_mean - summary$true_mean)
knitr::kable(summary)
var k sample_mean true_mean diff
n 1 11.9000 11.8206 0.0794
n 2 13.6100 13.6123 0.0023
n 3 10.9675 11.0323 0.0648
n 4 17.7550 17.8103 0.0553
n 5 17.0950 17.1073 0.0123
n 6 20.3100 20.2344 0.0756
n 7 16.9650 17.0239 0.0589
n 8 17.1025 17.1461 0.0436
n 9 21.4625 21.3691 0.0934
n 10 19.1525 19.2281 0.0756
# MSE vs running time
sample_means <- matrix(samples_n$cum_mean, nrow = length(unique(samples_n$iter)))
mse <- rowSums(sweep(sample_means, 2, true_means, '-')^2)/length(true_means)
mse_df <- data.frame(mse, time = sort(unique(samples_n$time)))
ggplot(mse_df, aes(time, mse)) +
  geom_line() +
  theme_bw() +
  xlab('time in seconds') +
  ggtitle('MSE vs running time')

Plot the distribution of n at each time step:

samples_n <- samples_df_new %>%
  filter(var == 'n')
true_marg <- read.csv('../data/true_marg.csv', header = FALSE)
for (k in 1:K) {
  samples_nk <- samples_n %>%
    filter_(interp(~ k == t_step, t_step = k))
  
  # Histogram of samples
  x_range <- min(samples_nk$value):max(samples_nk$value)
  hist_plt_title <- bquote("Sample count of" ~ n[.(k)])
  hist_plt <- ggplot(samples_nk, aes(value)) +
    geom_bar() +
    scale_x_continuous(breaks = x_range) +
    theme_bw() +
    ggtitle(hist_plt_title)
  print(hist_plt)
  
  # Density of samples
  dist_plt_title <- bquote("Sample posterior marginal of" ~ n[.(k)])
  dist_plt <- ggplot(samples_nk, aes(value, ..density..)) +
    geom_freqpoly(binwidth = 1, na.rm = TRUE) +
    geom_vline(aes(xintercept = mean(value)),
               color = 'red', linetype = 'dashed') +
    xlim(0, 40) +
    theme_bw() +
    ggtitle(dist_plt_title)
  print(dist_plt)
  
  # MSE vs running time
  sample_marg <- matrix(0, 40, nrow(samples_nk))
  n <- samples_nk$value
  sample_marg[nrow(sample_marg) * ((1:length(n)) - 1) + n + 1] <- 1
  sample_marg <- t(sample_marg)
  sample_marg <- apply(sample_marg, 2, cumsum)
  norm <- apply(sample_marg, 1, sum)
  sample_marg <- sample_marg / norm
  
  mse_plt_title <- bquote("MSE vs running time of" ~ n[.(k)])
  mse <- rowSums(sweep(sample_marg, 2, true_marg[, k], '-')^2)/length(true_marg)
  mse_df <- data.frame(mse, time = samples_nk$time)
  mse_plt <- ggplot(mse_df, aes(time, mse)) +
    geom_line() +
    theme_bw() +
    xlab('time in seconds') +
    ggtitle(mse_plt_title)
  print(mse_plt)
}

Parameter estimation

params <- c('lambda', 'delta', 'rho')
true_params <- c(lambda, delta, rho)
names(true_params) <- params

# Load samples
samples_df <- read.csv('../data/param_est/samples_iter10m_thin1k.csv')

# Preprocess df for ggplot
est_df <- samples_df %>% na.omit() %>%
  gather(estimator, estimate, -iter, -time, -param, -chain, -value) %>%
  select(time, chain, param, estimator, estimate) %>%
  mutate(time = time / 60)
den_df <- samples_df %>% select(chain, param, value)

for (param in params) {
  est_param <- est_df %>%
    filter_(interp(~ param == p, p = param)) %>%
    mutate(diff = abs(estimate - true_params[param]))
  den_param <- den_df %>%
    filter_(interp(~ param == p, p = param))
  
  # Plot mean and mode vs running time
  est_plt <- ggplot(est_param,
                    aes(time, estimate, colour = estimator, linetype = as.factor(chain))) +
    geom_line() +
    theme_bw() +
    xlab('time in hours') +
    ggtitle(paste('Estimate vs running time of', param))
  print(est_plt)

  # Plot error vs running time
  error_plt <- ggplot(est_param,
                      aes(time, diff, colour = estimator, linetype = as.factor(chain))) +
    geom_line() +
    theme_bw() +
    xlab('time in hours') +
    ggtitle(paste('Error vs running time of', param))
  print(error_plt)

  # Plot density of samples
  d_plt <- ggplot(den_param, aes(value, linetype = as.factor(chain))) +
    geom_density(bw = 'SJ') +
    geom_vline(aes(xintercept = true_params[param]),
               color = 'red', linetype = 'dashed') +
    theme_bw() +
    ggtitle(paste('Density of', param))
  print(d_plt)
}

summary <- samples_df %>%
  filter(iter == max(iter)) %>%
  select(chain, param, cum_mean, cum_mode) %>%
  mutate(error_mean = abs(cum_mean - true_params[param]),
         error_mode = abs(cum_mode - true_params[param]))
knitr::kable(summary)
chain param cum_mean cum_mode error_mean error_mode
1 delta 0.4223884 0.4310938 9.5776116 9.5689062
1 lambda 338.1901069 59.3803590 337.6901069 58.8803590
1 rho 0.0506652 0.0080672 0.7493348 0.7919328
2 delta 0.4237948 0.4366568 9.5762052 9.5633432
2 lambda 429.1740418 60.9619648 428.6740418 60.4619648
2 rho 0.0372176 0.0078886 0.7627824 0.7921114